1 Aggregated and atomic scores per method

2 read





list_wd <- strsplit(getwd(), '/')[[1]]
if (list_wd[length(list_wd)] == 'hadaca3_framework') {
  score_files <- list.files(path = "./output/scores/", full.names = TRUE)
} else {
  # score_files <- list.files(pattern = 'score-li*', full.names = TRUE)
  # score_files <- system("find . -maxdepth 1 -type f -name 'score-li*'", intern = TRUE)
  score_files <- dir_ls(".", regexp = "score-li.*")
}

plan(multisession,workers=4) # workers=25
# plan(sequential)


process_file <- function(score_file) {
  base_name <- basename(score_file)
  components <- str_match(base_name, 
    "score-li-(.+)_(.+)_mixRNA_(.+)_(.+)_RNA_(.+)_(.+)_scRNA_(.+)_(.+)_(.+)_mixMET_(.+)_(.+)_MET_(.+)_(.+)_(.+)_(.+).h5")[2:16]

  # If file name doesn't match expected pattern, skip
  if (any(is.na(components))) return(NULL)

  scores <- tryCatch({
    s <- read_hdf5(score_file)
    gc()
    s
  }, error = function(e) {
    message("Error reading file: ", score_file)
    message(e)
    NULL
  })

  # scores <- tryCatch({
  #   read_hdf5(score_file)
  # }, error = function(e) return(NULL))

  if (is.null(scores)) return(NULL)

  cbind(
    data.frame(
      dataset = components[1],
      ref = components[2],
      preprocessing_mixRNA = components[3],
      feature_selection_mixRNA = components[4],
      preprocessing_RNA = components[5],
      feature_selection_RNA = components[6],
      preprocessing_scRNA = components[7],
      feature_selection_scRNA = components[8],
      deconvolution_rna = components[9],
      preprocessing_mixMET = components[10],
      feature_selection_mixMET = components[11],
      preprocessing_MET = components[12],
      feature_selection_MET = components[13],
      deconvolution_met = components[14],
      late_integration = components[15],
      stringsAsFactors = FALSE
    ),
    scores
  )
}

# Process files in parallel
# results_list <- lapply(score_files, process_file)

results_list <- future_map(score_files, function(f) {
  tryCatch(process_file(f), error = function(e) NULL)
})


all_functions_li <- c(
  'preprocessing_mixRNA', 'feature_selection_mixRNA',
  'preprocessing_RNA', 'feature_selection_RNA',
  'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
  'preprocessing_mixMET', 'feature_selection_mixMET',
  'preprocessing_MET', 'feature_selection_MET', 'deconvolution_met',
  'late_integration'
)

if(length(results_list) != 0)  {
  results_li <- do.call(rbind, results_list)

  results_li %>%
    # filter(dc==2) %>%
    group_by(late_integration) %>%
    summarise(GlobalScore = median(score_aggreg)) %>%
    arrange(desc(GlobalScore))

  results_li_arrange = results_li %>%
    group_by(preprocessing_mixRNA, feature_selection_mixRNA, 
            preprocessing_RNA, feature_selection_RNA, 
            preprocessing_scRNA, feature_selection_scRNA, deconvolution_rna, 
            preprocessing_mixMET,feature_selection_mixMET, 
            preprocessing_MET, feature_selection_MET, deconvolution_met, 
            late_integration, .groups = "keep") %>% 
    summarise(GlobalScore = median(score_aggreg)) %>%
    arrange(desc(GlobalScore)) 



  # Optional: reorder factors
  all_data_used <- c('dataset', 'ref')
  for (data_used in all_data_used) {
    results_li[[data_used]] <- factor(results_li[[data_used]], levels = unique(results_li[[data_used]]))
  }


  # Optional: order other factors based on performance on 'invitro1'
  if ("invitro1" %in% results_li$dataset) {
    for (fun in all_functions_li) {
      results_li[[fun]] <- factor(results_li[[fun]],
        levels = unique(results_li[[fun]][order(results_li$score_aggreg[results_li$dataset == 'invitro1'], decreasing = TRUE)]))
    }
  }
}else{
results_li = all_functions_li
}
#> `summarise()` has grouped output by 'preprocessing_mixRNA',
#> 'feature_selection_mixRNA', 'preprocessing_RNA', 'feature_selection_RNA',
#> 'preprocessing_scRNA', 'feature_selection_scRNA', 'deconvolution_rna',
#> 'preprocessing_mixMET', 'feature_selection_mixMET', 'preprocessing_MET',
#> 'feature_selection_MET', 'deconvolution_met', 'late_integration'. You can
#> override using the `.groups` argument.

# dataset_names <- unique(results_li$dataset)
if (is.data.frame(results_li)) {
  dynamic_name_li <- paste0("results_li_",paste(unique(results_li$dataset), collapse = "_") )
}else{
  dynamic_name_li <- paste0("results_li_empty")
} 

  # Assign dynamically to global environment
# assign("results_li",dynamic_name_li, envir = .GlobalEnv)

# Write compressed output
write.csv(results_li, file = gzfile(paste0(dynamic_name_li, ".csv.gz")), row.names = FALSE)
# results_li = read.csv(file = gzfile("results_li.csv.gz"))
#> Warning in instance$preRenderHook(instance): It seems your data is too big for
#> client-side DataTables. You may consider server-side processing:
#> https://rstudio.github.io/DT/server.html

3 Early integration_table

4 Execute 08_meta :

rmarkdown::render(input ='08_metaanalysis.Rmd',envir = parent.frame());
#> 
#> 
#> processing file: 08_metaanalysis.Rmd
#> 1/72                                                   
#> 2/72 [unnamed-chunk-9]                                 
#> 3/72                                                   
#> 4/72 [re_loading_pckgs]                                
#> 5/72                                                   
#> 6/72 [load table if they do not exist]                 
#> 7/72                                                   
#> 8/72 [results_li_top10]                                
#> 9/72                                                   
#> 10/72 [fun_arranged_boxplot]
#> 11/72                                                   
#> 12/72 [cor_dens_plot]
#> 13/72                                                   
#> 14/72 [cor_plot]
#> 15/72                                                   
#> 16/72 [stepwise_model]
#> 17/72                                                   
#> 18/72 [val_propre_pca]
#> 19/72                                                   
#> 20/72 [var_plot_pca]
#> 21/72                                                   
#> 22/72 [dim_desc_pca]                                    
#> 23/72                                                   
#> 24/72 [new_plot_ellipses]
#> 25/72                                                   
#> 26/72 [mean_standardise_results_li_by_dataset]          
#> 27/72                                                   
#> 28/72 [cor_dens_plot_standardise]
#> 29/72                                                   
#> 30/72 [cor_plot_standardise]
#> 31/72                                                   
#> 32/72 [stepwise_model_standardise ]                     
#> 33/72                                                   
#> 34/72 [val_propre_pca_standardise]
#> 35/72                                                   
#> 36/72 [var_plot_pca_standardise]
#> 37/72                                                   
#> 38/72 [dim_desc_pca_standardise]                        
#> 39/72                                                   
#> 40/72 [new_plot_ellipses_standardise]
#> 41/72                                                   
#> 42/72 [unnamed-chunk-10]                                
#> 43/72                                                   
#> 44/72 [MFA]                                             
#> 45/72                                                   
#> 46/72 [RV_coef]                                         
#> 47/72                                                   
#> 48/72 [plot_group]                                      
#> 49/72                                                   
#> 50/72 [plot_var]                                        
#> 51/72                                                   
#> 52/72 [unnamed-chunk-11]                                
#> 53/72                                                   
#> 54/72 [unnamed-chunk-12]                                
#> 55/72                                                   
#> 56/72 [prepare data]                                    
#> 57/72                                                   
#> 58/72 [lm model]                                        
#> 59/72                                                   
#> 60/72 [anova]                                           
#> 61/72                                                   
#> 62/72 [unnamed-chunk-13]                                
#> 63/72                                                   
#> 64/72 [Convert function-type columns to dummy variables]
#> 65/72                                                   
#> 66/72 [Run PCA]                                         
#> 67/72                                                   
#> 68/72 [Visualize PCA with Score Overlay ]               
#> 69/72                                                   
#> 70/72 [contributing components]                         
#> 71/72                                                   
#> 72/72 [pca ]
#> output file: 08_metaanalysis.knit.md
#> /home/github-runner/.conda/envs/hadaca3framework_env/bin/pandoc +RTS -K512m -RTS 08_metaanalysis.knit.md --to html4 --from markdown+autolink_bare_uris+tex_math_single_backslash --output 08_metaanalysis.html --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/pagebreak.lua --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/latex-div.lua --lua-filter /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmarkdown/lua/table-classes.lua --variable bs3=TRUE --standalone --section-divs --table-of-contents --toc-depth 3 --variable toc_float=1 --variable toc_selectors=h1,h2,h3 --variable toc_collapsed=1 --variable toc_smooth_scroll=1 --variable toc_print=1 --template /home/github-runner/.conda/envs/hadaca3framework_env/lib/R/library/rmarkdown/rmd/h/default.html --no-highlight --variable highlightjs=1 --number-sections --variable theme=bootstrap --mathjax --variable 'mathjax-url=https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML' --include-in-header /tmp/RtmpJ9kZYd/rmarkdown-str3183ef34b66a69.html
#> 
#> Output created: 08_metaanalysis.html

5 Visualisations of the top methods

5.1 top 5 best methods

5.2 top 5 worst methods

6 Visualisations of the different metrics

6.1 Paper figures

6.1.1 PP

6.1.2 FS

6.1.3 DE

6.1.4 LI

6.2 Aggregated scores

6.2.1 PP

6.2.2 FS

6.2.3 DE

6.2.4 LI

6.3 RMSE

6.3.1 PP

6.3.2 FS

#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

6.3.3 DE

#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).
#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

6.3.4 LI

#> Warning: Removed 2592 rows containing non-finite outside the scale range
#> (`stat_ydensity()`).

6.4 Spearman correlation (row)

6.4.1 PP

6.4.2 FS

6.4.3 DE

6.4.4 LI